clear
set more off
cap log close

***************************************************************************************************
* Program: figure5_figureS4.do
* Purpose: Shuffle the prison-ZCTA contact matrix for placebo analysis, 
* and estimate event study coefficients using the CSDID estimator
***************************************************************************************************

************************************************************************
** set macros (directory)
************************************************************************
global prison_dir "~/Dropbox/Prison_Covid/PNAS_Nexus_Replication"
global raw_data "${prison_dir}/raw_data"
global intermediate_data "${prison_dir}/intermediate_data"
global code "${prison_dir}/code"
global analysis_data "${prison_dir}/analysis_data"
global output "${prison_dir}/output"

* Variables 
global demo "pct_nh_blk_alone_acs_15_19 pct_nh_white_alone_acs_15_19 pct_hisp_latino_acs_15_19 pct_college_acs_15_19 med_hhc_inc_acs_15_19 pct_pop_65plus_acs_15_19 log_tot_pop_acs_15_19"
global cubic "ZCTA_max_cases_p100k_rmpri5 ZCTA_max_cases_p100k_rmpri5_sq ZCTA_max_cases_p100k_rmpri5_cu"

global covidXwhite "cum_cases5_X_pct_white cases_5_X_pct_white cases_sq_5_X_pct_white cases_cu_5_X_pct_white"
global covidXinc "cum_cases5_X_inc cases_5_X_inc cases_sq_5_X_inc cases_cu_5_X_inc"	


************************************************************************
* Program to shuffle contact, define treatment status,
* and store shuffled results
************************************************************************

cap program drop store_shuffled_result 
program define store_shuffled_result 

	// argument is the number of draws/shuffles 
	syntax, num(integer)
		
	* initiate the result table 
	cap frame change default 
	cap frame drop results 
	frame create results
	frame results{
		
		set obs `num'
		
		* save csdid coefficients and standard errors: Matched on covid  variables 
		gen covid_t_3_5 = .
		gen covid_t_4_5 = .
		gen covid_t_5_6 = .
		gen covid_t_5_7 = .
		gen covid_t_5_8 = .
		gen covid_t_5_9 = .
		gen covid_t_5_10 = .
		gen covid_t_5_11 = .
		gen covid_t_5_12 = .
		
		gen covid_se_3_5 = .
		gen covid_se_4_5 = .
		gen covid_se_5_6 = .
		gen covid_se_5_7 = .
		gen covid_se_5_8 = .
		gen covid_se_5_9 = .
		gen covid_se_5_10 = .
		gen covid_se_5_11 = .
		gen covid_se_5_12 = .
		
		* save csdid coefficients and standard errors: Matched on covid + demo  variables 
		gen co_demo_t_3_5 = .
		gen co_demo_t_4_5 = .
		gen co_demo_t_5_6 = .
		gen co_demo_t_5_7 = .
		gen co_demo_t_5_8 = .
		gen co_demo_t_5_9 = .
		gen co_demo_t_5_10 = .
		gen co_demo_t_5_11 = .
		gen co_demo_t_5_12 = .
		
		gen co_demo_se_3_5 = .
		gen co_demo_se_4_5 = .
		gen co_demo_se_5_6 = .
		gen co_demo_se_5_7 = .
		gen co_demo_se_5_8 = .
		gen co_demo_se_5_9 = .
		gen co_demo_se_5_10 = .
		gen co_demo_se_5_11 = .	
		gen co_demo_se_5_12 = .	
		
	}

	* loop for `num' draws 
	forval i=1/`num'{
		
		display "`i'"
		preserve 

			* permute the contact matrix between ZCTA-Prison (either San Quentin OR others)
			shufflevar hr_pri_fr_home, cluster(NAME_X_month)
			
			rename hr_pri_fr_home_shuffled hr_shuffled 
			
			* identify placebo treatment/control group 
			
			* using hr_shuffled in June to define treatment/control 
			gen hr_shuffled_6_ = hr_shuffled if month == 6
			gegen hr_shuffled_6 = max(hr_shuffled_6_), by(ZCTA_contact)

			gen treated = (hr_shuffled_6 > 0)
			gen first_treated = 6 * (hr_shuffled_6 > 0) // month that ZCTA is first treated
			
			
			* csdid estimation
			qui csdid ZCTA_max_cases_p100k_rmpri $cubic cum_max_cases_p100k_rmpri5 log_tot_pop_acs_15_19, ivar(ZCTA_contact) time(month) gvar(first_treated) method(dripw) long	
			frame results{
				replace covid_t_3_5 = e(b)[1,1] in `i'
				replace covid_t_4_5 = e(b)[1,2] in `i'
				replace covid_t_5_6 = e(b)[1,3] in `i'
				replace covid_t_5_7 = e(b)[1,4] in `i'
				replace covid_t_5_8 = e(b)[1,5] in `i'
				replace covid_t_5_9 = e(b)[1,6] in `i'
				replace covid_t_5_10 = e(b)[1,7] in `i'
				replace covid_t_5_11 = e(b)[1,8] in `i'
				replace covid_t_5_12 = e(b)[1,9] in `i'
				
				replace covid_se_3_5 = sqrt(e(V)[1, 1]) in `i'
				replace covid_se_4_5 = sqrt(e(V)[2, 2]) in `i'
				replace covid_se_5_6 = sqrt(e(V)[3, 3]) in `i'
				replace covid_se_5_7 = sqrt(e(V)[4, 4]) in `i'
				replace covid_se_5_8 = sqrt(e(V)[5, 5]) in `i'
				replace covid_se_5_9 = sqrt(e(V)[6, 6]) in `i'
				replace covid_se_5_10 = sqrt(e(V)[7, 7]) in `i'
				replace covid_se_5_11 = sqrt(e(V)[8, 8]) in `i'
				replace covid_se_5_12 = sqrt(e(V)[9, 9]) in `i'
			
			}
			
			qui csdid ZCTA_max_cases_p100k_rmpri $cubic cum_max_cases_p100k_rmpri5 $demo, ivar(ZCTA_contact) time(month) gvar(first_treated) method(dripw) long
			frame results{
				replace co_demo_t_3_5 = e(b)[1,1] in `i'
				replace co_demo_t_4_5 = e(b)[1,2] in `i'
				replace co_demo_t_5_6 = e(b)[1,3] in `i'
				replace co_demo_t_5_7 = e(b)[1,4] in `i'
				replace co_demo_t_5_8 = e(b)[1,5] in `i'
				replace co_demo_t_5_9 = e(b)[1,6] in `i'
				replace co_demo_t_5_10 = e(b)[1,7] in `i'
				replace co_demo_t_5_11 = e(b)[1,8] in `i'
				replace co_demo_t_5_12 = e(b)[1,9] in `i'
				
				replace co_demo_se_3_5 = sqrt(e(V)[1, 1]) in `i'
				replace co_demo_se_4_5 = sqrt(e(V)[2, 2]) in `i'
				replace co_demo_se_5_6 = sqrt(e(V)[3, 3]) in `i'
				replace co_demo_se_5_7 = sqrt(e(V)[4, 4]) in `i'
				replace co_demo_se_5_8 = sqrt(e(V)[5, 5]) in `i'
				replace co_demo_se_5_9 = sqrt(e(V)[6, 6]) in `i'
				replace co_demo_se_5_10 = sqrt(e(V)[7, 7]) in `i'
				replace co_demo_se_5_11 = sqrt(e(V)[8, 8]) in `i'
				replace co_demo_se_5_12 = sqrt(e(V)[9, 9]) in `i'

			}
			
			
		restore 
	}

	* change frame to the results 
	frame change results

	* create confidence interval for the estimator
	forval i = 3/4{
		gen lb_`i'_5 = covid_t_`i'_5 - 1.96 * covid_se_`i'_5
		gen ub_`i'_5 = covid_t_`i'_5 + 1.96 * covid_se_`i'_5
		gen demo_lb_`i'_5 = co_demo_t_`i'_5 - 1.96 * co_demo_se_`i'_5
		gen demo_ub_`i'_5 = co_demo_t_`i'_5 + 1.96 * co_demo_se_`i'_5		
		
	}

	forval i = 6/12{
		gen lb_5_`i' = covid_t_5_`i' - 1.96 * covid_se_5_`i'
		gen ub_5_`i' = covid_t_5_`i' + 1.96 * covid_se_5_`i'
		gen demo_lb_5_`i' = co_demo_t_5_`i' - 1.96 * co_demo_se_5_`i'
		gen demo_ub_5_`i' = co_demo_t_5_`i' + 1.96 * co_demo_se_5_`i'
	}
	
end 


	
******************************************************
* placebo test: shuffle contact
******************************************************

* read data 
use "${analysis_data}/San_Quentin_main_analysis_data.dta", clear 
drop first_treated treated
* create cluster for shuffle
egen NAME_X_month = group(NAME month)
egen NAME_X_ZCTA = group(NAME ZCTA_contact)
	
* shuffle
// set seed 12
store_shuffled_result, num(1000)

* save shuffle result into data
save "${intermediate_data}/SQ_shuffle_contact_matrix_csdid_result.dta", replace 

******************************************************
* Plot permutation estimates
******************************************************

use "${intermediate_data}/SQ_shuffle_contact_matrix_csdid_result.dta", clear

* create z-statistics = coef / se 
forval i = 3/4{
	gen z_`i'_5 = covid_t_`i'_5 / covid_se_`i'_5
	gen demo_z_`i'_5 = co_demo_t_`i'_5 / co_demo_se_`i'_5	
	
}

forval i = 6/12{
	gen z_5_`i' = covid_t_5_`i' / covid_se_5_`i'
	gen demo_z_5_`i' = co_demo_t_5_`i' / co_demo_se_5_`i'
	
}

* capped histogram 
gen covid_t_5_8_cap = covid_t_5_8
replace covid_t_5_8_cap = -300 if covid_t_5_8 < -300


local att_covid_demo = 81.2
count if co_demo_t_5_8 > `att_covid_demo'
hist co_demo_t_5_8, freq bin(100) xline(`att_covid_demo', lcolor(red)) title("Permutation Result: Matched on COVID-19 Variables and Demographics", size(medium)) ///
	xtitle("Doubly-robust Estimate of Effect in August") ///
	text(38 -100 "N. estimates > `att_covid_demo': `=r(N)' (out of 1000 permutations)", place(e)) 
graph export "${output}/figure5.png", replace

local att_covid = 53
count if covid_t_5_8 > `att_covid'
hist covid_t_5_8_cap, freq bin(100) xline(`att_covid', lcolor(red)) title("Permutation Result: Matched on COVID-19 Variables", size(medium)) ///
	xtitle("Doubly-robust Estimate of Effect in August")  ///
	xlabel(-300 "<=-300" -200 "-200" -100 "-100" 0 "0" 100 "100" 200 "200" 300 "300") ///
	text(50 -300 "N. estimates > `att_covid': `=r(N)' (out of 1000 permutations)", place(e))
graph export "${output}/figureS4.png", replace
